<?php

$urls = array (
	 	"http://www.donbarato.com/alicante/vehiculos/",
		"http://www.donbarato.com/alicante/vehiculos/2",
		"http://www.donbarato.com/alicante/vehiculos/3",
		"http://www.donbarato.com/alicante/inmobiliaria",
		"http://www.donbarato.com/alicante/inmobiliaria/2",
		"http://www.donbarato.com/alicante/inmobiliaria/3"
		);
mysql_connect('localhost','root','dev');
mysql_select_db('barabot');


 function robots_allowed($url, $useragent=false)
  {
    # parse url to retrieve host and path
    $parsed = parse_url($url);

    $agents = array(preg_quote('*'));
    if($useragent) $agents[] = preg_quote($useragent);
    $agents = implode('|', $agents);

    # location of robots.txt file
    $robotstxt = @file("http://{$parsed['host']}/robots.txt");
    if(!$robotstxt) return true;

    $rules = array();
    $ruleapplies = false;
    foreach($robotstxt as $line) {
      # skip blank lines
      if(!$line = trim($line)) continue;

      # following rules only apply if User-agent matches $useragent or '*'
      if(preg_match('/User-agent: (.*)/i', $line, $match)) {
        $ruleapplies = preg_match("/($agents)/i", $match[1]);
      }
      if($ruleapplies && preg_match('/Disallow:(.*)/i', $line, $regs)) {
        # an empty rule implies full access - no further tests required
        if(!$regs[1]) return true;
        # add rules that apply to array for testing
        $rules[] = preg_quote(trim($regs[1]), '/');
      }
    }

    foreach($rules as $rule) {
      # check if page is disallowed to us
      if(preg_match("/^$rule/", $parsed['path'])) return false;
    }

    # page is not disallowed
    return true;
  }


foreach($urls as $url)
{


 ini_set('user_agent', 'NameOfAgent (http://www.example.net)');

  if(robots_allowed($url, "NameOfAgent"))
  {
    $input = @file_get_contents($url) or die('Could not access file: $url');
    $regexp = "<a\s[^>]*href=(\'??)\/(comprar|alquilar)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
    if(preg_match_all("/$regexp/siU", $input, $matches, PREG_SET_ORDER))
    {
		foreach($matches as $match)
		{
			# $match[2] = link address
		$query='INSERT INTO links (url,title) VALUES ("'.addslashes($match[0]).'","'.addslashes($match[4]).'")';
		mysql_query($query) or die (mysql_error());
			# $match[3] = link text
		}
    }
  } else {
    die('Access denied by robots.txt');
  }
}